Huff 중력모델을 기반으로 영향권 분석 -> 상권 분석 등에 활용
import math
import geopandas as gpd
import pandas as pd
from fiona.crs import from_epsg # 좌표변환
import contextily as ctx
# 거리를 계산하기 위한 라이브러리 로드
from geopy.distance import great_circle
import matplotlib
import matplotlib.pyplot as plt
import matplotlib as mpl
import seaborn as sns
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
sns.set_style('darkgrid') #whitegrid, dark, white, ticks
mpl.rc('font', family='NanumGothic') #한글 폰트 적용시
plt.rcParams["figure.figsize"] = (16,4.5) #차트 사이즈
http://data.seoul.go.kr/dataList/OA-16096/S/1/datasetView.do
df_mart01 = pd.read_csv('data/서울특별시 대규모점포 인허가 정보.csv', encoding='cp949')
df_mart01.info()
df_mart01.head(2)
<class 'pandas.core.frame.DataFrame'> RangeIndex: 937 entries, 0 to 936 Data columns (total 26 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 개방자치단체코드 937 non-null int64 1 관리번호 937 non-null int64 2 인허가일자 937 non-null int64 3 인허가취소일자 7 non-null float64 4 영업상태코드 937 non-null int64 5 영업상태명 937 non-null object 6 상세영업상태코드 937 non-null object 7 상세영업상태명 929 non-null object 8 폐업일자 143 non-null float64 9 휴업시작일자 17 non-null float64 10 휴업종료일자 17 non-null float64 11 재개업일자 34 non-null float64 12 전화번호 922 non-null object 13 소재지면적 862 non-null float64 14 소재지우편번호 408 non-null float64 15 지번주소 815 non-null object 16 도로명주소 762 non-null object 17 도로명우편번호 581 non-null float64 18 사업장명 937 non-null object 19 최종수정일자 937 non-null int64 20 데이터갱신구분 937 non-null object 21 데이터갱신일자 937 non-null object 22 업태구분명 933 non-null object 23 좌표정보(X) 863 non-null float64 24 좌표정보(Y) 863 non-null float64 25 점포구분명 559 non-null object dtypes: float64(10), int64(5), object(11) memory usage: 190.5+ KB
| 개방자치단체코드 | 관리번호 | 인허가일자 | 인허가취소일자 | 영업상태코드 | 영업상태명 | 상세영업상태코드 | 상세영업상태명 | 폐업일자 | 휴업시작일자 | ... | 도로명주소 | 도로명우편번호 | 사업장명 | 최종수정일자 | 데이터갱신구분 | 데이터갱신일자 | 업태구분명 | 좌표정보(X) | 좌표정보(Y) | 점포구분명 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 3000000 | 2019300020207500001 | 20190917 | NaN | 1 | 영업/정상 | 1 | 정상영업 | NaN | NaN | ... | 서울특별시 종로구 삼일대로 428 낙원상가 506호 (낙원동) | 3140.0 | 낙원상가 | 20190925150602 | U | 2019-09-27 02:40:00.0 | 그 밖의 대규모점포 | 198838.912966 | 452214.210962 | 대규모점포 |
| 1 | 3000000 | 2019300020207500002 | 20190917 | NaN | 1 | 영업/정상 | 1 | 정상영업 | NaN | NaN | ... | 서울특별시 종로구 삼일대로 428 낙원상가 (낙원동) | 3140.0 | 낙원상가 | 20190925150506 | I | 2019-09-27 02:22:37.0 | 그 밖의 대규모점포 | 198838.912966 | 452214.210962 | 대규모점포 |
2 rows × 26 columns
# 정상영업중인 대규모점포만 추출
df_mart02 = df_mart01[ (df_mart01.영업상태코드 == 1) ] #& (df_mart01.업태구분명.isin(['대형마트', '쇼핑센터', '백화점'])
df_mart02.info()
df_mart02.head(2)
<class 'pandas.core.frame.DataFrame'> Int64Index: 707 entries, 0 to 936 Data columns (total 26 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 개방자치단체코드 707 non-null int64 1 관리번호 707 non-null int64 2 인허가일자 707 non-null int64 3 인허가취소일자 0 non-null float64 4 영업상태코드 707 non-null int64 5 영업상태명 707 non-null object 6 상세영업상태코드 707 non-null object 7 상세영업상태명 699 non-null object 8 폐업일자 0 non-null float64 9 휴업시작일자 4 non-null float64 10 휴업종료일자 4 non-null float64 11 재개업일자 32 non-null float64 12 전화번호 699 non-null object 13 소재지면적 652 non-null float64 14 소재지우편번호 335 non-null float64 15 지번주소 608 non-null object 16 도로명주소 626 non-null object 17 도로명우편번호 498 non-null float64 18 사업장명 707 non-null object 19 최종수정일자 707 non-null int64 20 데이터갱신구분 707 non-null object 21 데이터갱신일자 707 non-null object 22 업태구분명 706 non-null object 23 좌표정보(X) 657 non-null float64 24 좌표정보(Y) 657 non-null float64 25 점포구분명 449 non-null object dtypes: float64(10), int64(5), object(11) memory usage: 149.1+ KB
| 개방자치단체코드 | 관리번호 | 인허가일자 | 인허가취소일자 | 영업상태코드 | 영업상태명 | 상세영업상태코드 | 상세영업상태명 | 폐업일자 | 휴업시작일자 | ... | 도로명주소 | 도로명우편번호 | 사업장명 | 최종수정일자 | 데이터갱신구분 | 데이터갱신일자 | 업태구분명 | 좌표정보(X) | 좌표정보(Y) | 점포구분명 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 3000000 | 2019300020207500001 | 20190917 | NaN | 1 | 영업/정상 | 1 | 정상영업 | NaN | NaN | ... | 서울특별시 종로구 삼일대로 428 낙원상가 506호 (낙원동) | 3140.0 | 낙원상가 | 20190925150602 | U | 2019-09-27 02:40:00.0 | 그 밖의 대규모점포 | 198838.912966 | 452214.210962 | 대규모점포 |
| 1 | 3000000 | 2019300020207500002 | 20190917 | NaN | 1 | 영업/정상 | 1 | 정상영업 | NaN | NaN | ... | 서울특별시 종로구 삼일대로 428 낙원상가 (낙원동) | 3140.0 | 낙원상가 | 20190925150506 | I | 2019-09-27 02:22:37.0 | 그 밖의 대규모점포 | 198838.912966 | 452214.210962 | 대규모점포 |
2 rows × 26 columns
# 소재지면적과 좌표가 있는 데이터만 사용
df_mart02.dropna(subset=['소재지면적'], axis=0, inplace=True)
df_mart02.dropna(subset=['좌표정보(X)'], axis=0, inplace=True)
df_mart02.dropna(subset=['좌표정보(Y)'], axis=0, inplace=True)
df_mart02.reset_index()
df_mart02.info()
<class 'pandas.core.frame.DataFrame'> Int64Index: 637 entries, 0 to 936 Data columns (total 26 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 개방자치단체코드 637 non-null int64 1 관리번호 637 non-null int64 2 인허가일자 637 non-null int64 3 인허가취소일자 0 non-null float64 4 영업상태코드 637 non-null int64 5 영업상태명 637 non-null object 6 상세영업상태코드 637 non-null object 7 상세영업상태명 632 non-null object 8 폐업일자 0 non-null float64 9 휴업시작일자 4 non-null float64 10 휴업종료일자 4 non-null float64 11 재개업일자 28 non-null float64 12 전화번호 632 non-null object 13 소재지면적 637 non-null float64 14 소재지우편번호 330 non-null float64 15 지번주소 541 non-null object 16 도로명주소 607 non-null object 17 도로명우편번호 492 non-null float64 18 사업장명 637 non-null object 19 최종수정일자 637 non-null int64 20 데이터갱신구분 637 non-null object 21 데이터갱신일자 637 non-null object 22 업태구분명 636 non-null object 23 좌표정보(X) 637 non-null float64 24 좌표정보(Y) 637 non-null float64 25 점포구분명 441 non-null object dtypes: float64(10), int64(5), object(11) memory usage: 134.4+ KB
<ipython-input-127-b550982e086b>:2: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy df_mart02.dropna(subset=['소재지면적'], axis=0, inplace=True) <ipython-input-127-b550982e086b>:3: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy df_mart02.dropna(subset=['좌표정보(X)'], axis=0, inplace=True) <ipython-input-127-b550982e086b>:4: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy df_mart02.dropna(subset=['좌표정보(Y)'], axis=0, inplace=True)
df_mart02.소재지면적.describe()
count 637.000000 mean 9054.809937 std 15856.622852 min 0.000000 25% 476.000000 50% 3386.040000 75% 10269.890000 max 145244.540000 Name: 소재지면적, dtype: float64
#소재지면적 평균이 약 9000, 중위값이 3386으로 소규모 매장이 많이 포함되어 있음. 상위 75% 보다 큰 매장만 추출
df_mart02 = df_mart02[ df_mart02.소재지면적 >= 10270 ]
df_mart02.info()
<class 'pandas.core.frame.DataFrame'> Int64Index: 159 entries, 0 to 935 Data columns (total 26 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 개방자치단체코드 159 non-null int64 1 관리번호 159 non-null int64 2 인허가일자 159 non-null int64 3 인허가취소일자 0 non-null float64 4 영업상태코드 159 non-null int64 5 영업상태명 159 non-null object 6 상세영업상태코드 159 non-null object 7 상세영업상태명 157 non-null object 8 폐업일자 0 non-null float64 9 휴업시작일자 2 non-null float64 10 휴업종료일자 2 non-null float64 11 재개업일자 7 non-null float64 12 전화번호 159 non-null object 13 소재지면적 159 non-null float64 14 소재지우편번호 85 non-null float64 15 지번주소 143 non-null object 16 도로명주소 153 non-null object 17 도로명우편번호 114 non-null float64 18 사업장명 159 non-null object 19 최종수정일자 159 non-null int64 20 데이터갱신구분 159 non-null object 21 데이터갱신일자 159 non-null object 22 업태구분명 159 non-null object 23 좌표정보(X) 159 non-null float64 24 좌표정보(Y) 159 non-null float64 25 점포구분명 104 non-null object dtypes: float64(10), int64(5), object(11) memory usage: 33.5+ KB
# 필요 항목만 추출
df_mart03 = df_mart02[['관리번호', '소재지면적', '사업장명', '업태구분명', '좌표정보(X)', '좌표정보(Y)' ]]
df_mart03.columns = ['mart_id', 'mart_area', 'mart_nm', 'mart_ty', 'mart_x', 'mart_y']
df_mart03.info()
df_mart03.head()
<class 'pandas.core.frame.DataFrame'> Int64Index: 159 entries, 0 to 935 Data columns (total 6 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 mart_id 159 non-null int64 1 mart_area 159 non-null float64 2 mart_nm 159 non-null object 3 mart_ty 159 non-null object 4 mart_x 159 non-null float64 5 mart_y 159 non-null float64 dtypes: float64(3), int64(1), object(2) memory usage: 8.7+ KB
| mart_id | mart_area | mart_nm | mart_ty | mart_x | mart_y | |
|---|---|---|---|---|---|---|
| 0 | 2019300020207500001 | 25198.02 | 낙원상가 | 그 밖의 대규모점포 | 198838.912966 | 452214.210962 |
| 1 | 2019300020207500002 | 25198.02 | 낙원상가 | 그 밖의 대규모점포 | 198838.912966 | 452214.210962 |
| 2 | 2003300012907500001 | 16128.75 | 광장주식회사 | 시장 | 199837.414267 | 451974.128468 |
| 6 | 2004300007607500018 | 11750.00 | 동대문종합시장D동상가 | 그 밖의 대규모점포 | 200579.409755 | 452035.347659 |
| 13 | 2012300012907500001 | 16128.75 | 광장시장 | 복합쇼핑몰 | 199837.414267 | 451974.128468 |
# 마트 공간데이터화
df_mart_geom = gpd.points_from_xy(df_mart03.mart_x, df_mart03.mart_y)
columns = df_mart03.columns
gdf_mart2097 = gpd.GeoDataFrame(df_mart03[columns], geometry=df_mart_geom, crs=from_epsg(2097))
gdf_mart4326 = gdf_mart2097.to_crs(epsg=4326)
gdf_mart3857 = gdf_mart5179.to_crs(epsg=3857)
C:\Users\dhl\AppData\Roaming\Python\Python38\site-packages\pyproj\crs\crs.py:53: FutureWarning: '+init=<authority>:<code>' syntax is deprecated. '<authority>:<code>' is the preferred initialization method. When making the change, be mindful of axis order changes: https://pyproj4.github.io/pyproj/stable/gotchas.html#axis-order-changes-in-proj-6
return _prepare_from_string(" ".join(pjargs))
# 마트 위치 확인용
ax = gdf_mart3857.plot(markersize=7, figsize=(10, 10))
ctx.add_basemap(ax, source=ctx.sources.ST_TERRAIN) #ctx.sources.ST_TONER_LITE
ax.set_axis_off()
plt.show()
<ipython-input-137-37ce67be6df6>:3: FutureWarning: The "contextily.tile_providers" module is deprecated and will be removed in contextily v1.1. Please use "contextily.providers" instead. ctx.add_basemap(ax, source=ctx.sources.ST_TERRAIN) #ctx.sources.ST_TONER_LITE
# Geometry를 이용하여 마트의 X, Y 좌표 항목을 생성
gdf_mart4326["mart_lon"] = gdf_mart4326.geometry.x
gdf_mart4326["mart_lat"] = gdf_mart4326.geometry.y
http://data.seoul.go.kr/dataList/OA-14979/F/1/datasetView.do
# 집계구 공간데이터(shp) 로딩
gdf_jip5179 = gpd.read_file('data/admin_sl_2016/집계구.shp')
gdf_jip5179.info()
gdf_jip5179.head(3)
<class 'geopandas.geodataframe.GeoDataFrame'> RangeIndex: 19153 entries, 0 to 19152 Data columns (total 4 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 TOT_REG_CD 19153 non-null object 1 ADM_NM 19153 non-null object 2 ADM_CD 19153 non-null object 3 geometry 19153 non-null geometry dtypes: geometry(1), object(3) memory usage: 598.7+ KB
| TOT_REG_CD | ADM_NM | ADM_CD | geometry | |
|---|---|---|---|---|
| 0 | 1101053010006 | 사직동 | 1101053 | POLYGON ((953231.968 1953435.866, 953237.202 1... |
| 1 | 1101053010001 | 사직동 | 1101053 | POLYGON ((952858.566 1952976.274, 952875.055 1... |
| 2 | 1101053010003 | 사직동 | 1101053 | POLYGON ((952889.178 1953405.246, 952890.836 1... |
gdf_jip5179.crs = "epsg:5179"
gdf_jip3857 = gdf_jip5179.to_crs(epsg=3857)
gdf_jip4326 = gdf_jip5179.to_crs(epsg=4326)
ax = gdf_jip3857.plot(color='gray', figsize=(12, 12), alpha=0.6)
ctx.add_basemap(ax, source=ctx.sources.ST_TERRAIN) #ctx.sources.ST_TONER_LITE
ax.set_axis_off()
plt.show()
<ipython-input-13-1af1b8e2140b>:2: FutureWarning: The "contextily.tile_providers" module is deprecated and will be removed in contextily v1.1. Please use "contextily.providers" instead. ctx.add_basemap(ax, source=ctx.sources.ST_TERRAIN) #ctx.sources.ST_TONER_LITE
# 7월15일 집계구별 생활인구 데이터 로딩
df_pop01 = pd.read_csv('data/LOCAL_PEOPLE_20210715.csv', encoding='cp949', header=0,
names=['dt', 'hour', 'hdong', 'TOT_REG_CD', 'pop_all', 'm0', 'm10', 'm15', 'm20', 'm25', 'm30', 'm35', 'm40', 'm45', 'm50', 'm55', 'm60', 'm65', 'm70', 'w0', 'w10', 'w15', 'w20', 'w25', 'w30', 'w35', 'w40', 'w45', 'w50', 'w55', 'w60', 'w65', 'w70'],
usecols=['hour', 'TOT_REG_CD', 'pop_all'],
dtype={'hour':float, 'TOT_REG_CD':object, 'pop_all':float}
)
df_pop01.head(3)
| hour | TOT_REG_CD | pop_all | |
|---|---|---|---|
| 0 | 17.0 | 1115060030401 | 153.4582 |
| 1 | 1.0 | 1115060040001 | 3.9789 |
| 2 | 9.0 | 1110060020003 | 10.7129 |
# 10시~ 20시 사이의 총생활인구(pop_all)을 합산하여 사용
df_pop02 = df_pop01[ (df_pop01.hour >= 10) & (df_pop01.hour <= 20) ]
df_pop03 = df_pop02.groupby(['TOT_REG_CD'])['pop_all'].sum().reset_index()
df_pop03['TOT_REG_CD'] = df_pop03.TOT_REG_CD.astype('object')
df_pop03.info()
df_pop03.head()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 18953 entries, 0 to 18952 Data columns (total 2 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 TOT_REG_CD 18953 non-null object 1 pop_all 18953 non-null float64 dtypes: float64(1), object(1) memory usage: 296.3+ KB
| TOT_REG_CD | pop_all | |
|---|---|---|
| 0 | 1101053010001 | 3828.8897 |
| 1 | 1101053010002 | 20118.0878 |
| 2 | 1101053010003 | 22248.8882 |
| 3 | 1101053010004 | 21798.5802 |
| 4 | 1101053010005 | 16343.3748 |
# 집계구 공간데이터에 집계구별 생활인구수를 속성조인
gdf_jip4326m = pd.merge(gdf_jip4326, df_pop03, on='TOT_REG_CD', how='left')
gdf_jip3857m = pd.merge(gdf_jip3857, df_pop03, on='TOT_REG_CD', how='left')
gdf_jip4326m.tail()
| TOT_REG_CD | ADM_NM | ADM_CD | geometry | pop_all | |
|---|---|---|---|---|---|
| 19148 | 1102073010501 | 동화동 | 1102073 | POLYGON ((127.01764 37.56250, 127.01763 37.562... | 1221.9702 |
| 19149 | 1102055030001 | 명동 | 1102055 | POLYGON ((126.98893 37.56810, 126.98895 37.568... | 121202.7230 |
| 19150 | 1102058040004 | 장충동 | 1102058 | POLYGON ((127.00338 37.55903, 127.00344 37.559... | 48856.1133 |
| 19151 | 1121052010002 | 보라매동 | 1121052 | POLYGON ((126.92720 37.49429, 126.92709 37.493... | 23755.7097 |
| 19152 | 1121052010001 | 보라매동 | 1121052 | POLYGON ((126.92591 37.49144, 126.92596 37.491... | 29766.9029 |
# 10~20시 집계구별 생활인구수 단계구분도
ax = gdf_jip3857m.plot(column='pop_all', scheme='Fisher_Jenks', k=5, cmap='Oranges_r', figsize=(15, 15), alpha=0.7)
ctx.add_basemap(ax, source=ctx.sources.ST_TONER_LITE) #ctx.sources.ST_TONER_LITE
ax.set_axis_off()
plt.show()
<ipython-input-18-4e849bace3ce>:3: FutureWarning: The "contextily.tile_providers" module is deprecated and will be removed in contextily v1.1. Please use "contextily.providers" instead. ctx.add_basemap(ax, source=ctx.sources.ST_TONER_LITE) #ctx.sources.ST_TONER_LITE
# Geometry를 이용하여 중심점의 X, Y 좌표 항목을 생성
gdf_jip4326m_pt = gdf_jip4326m.copy()
gdf_jip4326m_pt['geometry'] = gdf_jip4326m_pt['geometry'].centroid
gdf_jip4326m_pt["jip_lon"] = gdf_jip4326m_pt.geometry.x
gdf_jip4326m_pt["jip_lat"] = gdf_jip4326m_pt.geometry.y
gdf_jip4326m_pt.head()
<ipython-input-39-0f00ed413452>:3: UserWarning: Geometry is in a geographic CRS. Results from 'centroid' are likely incorrect. Use 'GeoSeries.to_crs()' to re-project geometries to a projected CRS before this operation. gdf_jip4326m_pt['geometry'] = gdf_jip4326m_pt['geometry'].centroid
| TOT_REG_CD | ADM_NM | ADM_CD | geometry | pop_all | jip_lon | jip_lat | |
|---|---|---|---|---|---|---|---|
| 0 | 1101053010006 | 사직동 | 1101053 | POINT (126.97031 37.57830) | 4423.9508 | 126.970311 | 37.578304 |
| 1 | 1101053010001 | 사직동 | 1101053 | POINT (126.96518 37.57415) | 3828.8897 | 126.965178 | 37.574151 |
| 2 | 1101053010003 | 사직동 | 1101053 | POINT (126.96551 37.57651) | 22248.8882 | 126.965513 | 37.576513 |
| 3 | 1101053010002 | 사직동 | 1101053 | POINT (126.96900 37.57709) | 20118.0878 | 126.969002 | 37.577092 |
| 4 | 1101053010005 | 사직동 | 1101053 | POINT (126.97277 37.57803) | 16343.3748 | 126.972772 | 37.578025 |
# cross join geopandas - 데이터가 너무 크면 문제될 수 있음
gdf_jip4326m_pt['key'] = 1
gdf_mart4326['key'] = 1
merged = gdf_jip4326m_pt.merge(gdf_mart4326, on='key')
# 집계구와 마트간 직선거리 산출 - 이중 반복문을 쓰지 않아도 됨
merged['dist'] = merged.apply(lambda x: great_circle((x['jip_lat'], x['jip_lon']), (x['mart_lat'], x['mart_lon'])).kilometers, axis=1)
merged.shape
(3045327, 18)
merged.head()
| TOT_REG_CD | ADM_NM | ADM_CD | geometry_x | pop_all | jip_lon | jip_lat | dist | key | mart_id | mart_area | mart_nm | mart_ty | mart_x | mart_y | geometry_y | mart_lon | mart_lat | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1101053010006 | 사직동 | 1101053 | POINT (126.97031 37.57830) | 4423.9508 | 126.970311 | 37.578304 | 1.441665 | 1 | 2019300020207500001 | 25198.02 | 낙원상가 | 그 밖의 대규모점포 | 198838.912966 | 452214.210962 | POINT (126.98475 37.57222) | 126.984755 | 37.572217 |
| 1 | 1101053010006 | 사직동 | 1101053 | POINT (126.97031 37.57830) | 4423.9508 | 126.970311 | 37.578304 | 1.441665 | 1 | 2019300020207500002 | 25198.02 | 낙원상가 | 그 밖의 대규모점포 | 198838.912966 | 452214.210962 | POINT (126.98475 37.57222) | 126.984755 | 37.572217 |
| 2 | 1101053010006 | 사직동 | 1101053 | POINT (126.97031 37.57830) | 4423.9508 | 126.970311 | 37.578304 | 2.447428 | 1 | 2003300012907500001 | 16128.75 | 광장주식회사 | 시장 | 199837.414267 | 451974.128468 | POINT (126.99606 37.57005) | 126.996057 | 37.570055 |
| 3 | 1101053010006 | 사직동 | 1101053 | POINT (126.97031 37.57830) | 4423.9508 | 126.970311 | 37.578304 | 3.128584 | 1 | 2004300007607500018 | 11750.00 | 동대문종합시장D동상가 | 그 밖의 대규모점포 | 200579.409755 | 452035.347659 | POINT (127.00446 37.57061) | 127.004456 | 37.570606 |
| 4 | 1101053010006 | 사직동 | 1101053 | POINT (126.97031 37.57830) | 4423.9508 | 126.970311 | 37.578304 | 2.447428 | 1 | 2012300012907500001 | 16128.75 | 광장시장 | 복합쇼핑몰 | 199837.414267 | 451974.128468 | POINT (126.99606 37.57005) | 126.996057 | 37.570055 |
# 중력모델 기반 매력도(attract) 항목 산출 - 매장 면적 / 거리의 제곱
merged['attract'] = merged.apply(lambda x: x['mart_area'] / math.pow(x['dist'], 2), axis=1)
merged.head()
| TOT_REG_CD | ADM_NM | ADM_CD | geometry_x | pop_all | jip_lon | jip_lat | dist | key | mart_id | mart_area | mart_nm | mart_ty | mart_x | mart_y | geometry_y | mart_lon | mart_lat | attract | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1101053010006 | 사직동 | 1101053 | POINT (126.97031 37.57830) | 4423.9508 | 126.970311 | 37.578304 | 5.478750 | 1 | 2004302007407500001 | 117912.81 | 아이파크몰(I PARK MALL) | 쇼핑센터 | 196762.077395 | 447480.039577 | POINT (126.96127 37.52956) | 126.961267 | 37.529557 | 3928.239647 |
| 1 | 1101053010006 | 사직동 | 1101053 | POINT (126.97031 37.57830) | 4423.9508 | 126.970311 | 37.578304 | 2.072085 | 1 | 2010301010007500002 | 12585.13 | 신세계백화점메사점 | 쇼핑센터 | 198129.435543 | 450900.239718 | POINT (126.97673 37.56038) | 126.976727 | 37.560377 | 2931.181501 |
| 2 | 1101053010006 | 사직동 | 1101053 | POINT (126.97031 37.57830) | 4423.9508 | 126.970311 | 37.578304 | 3.621467 | 1 | 2010301010007500003 | 8119.00 | 올레오 | 쇼핑센터 | 201040.352596 | 451852.142613 | POINT (127.00967 37.56896) | 127.009674 | 37.568955 | 619.061011 |
| 3 | 1101053010006 | 사직동 | 1101053 | POINT (126.97031 37.57830) | 4423.9508 | 126.970311 | 37.578304 | 1.653768 | 1 | 2012301010007500002 | 59979.25 | 롯데백화점 본점 | 백화점 | 198259.653577 | 451392.198219 | POINT (126.97820 37.56481) | 126.978200 | 37.564810 | 21930.676029 |
| 4 | 1101053010006 | 사직동 | 1101053 | POINT (126.97031 37.57830) | 4423.9508 | 126.970311 | 37.578304 | 2.470420 | 1 | 2012301010007500004 | 11576.19 | 롯데아울렛 서울역점 | 쇼핑센터 | 197230.206090 | 450446.684396 | POINT (126.96655 37.55629) | 126.966551 | 37.556288 | 1896.810663 |
# 중력모델 값 확인용
#merged_part = merged[ merged.TOT_REG_CD == 1101053010006 ]
#merged_part.to_csv("data/merged.csv", encoding='euc-kr')
# 집계구별로 마트 매력도가 가장 큰값만 추출
merged2 = merged.groupby('TOT_REG_CD')['attract'].agg(**{'attract':'max'}).reset_index()
# 아래 코드로 해도 될 것으로 예상.어느 게 더 성능이 좋을지는...
#merged.sort_values('attract').drop_duplicates(['TOT_REG_CD'],keep='last')
merged2.info()
merged2.head()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 19153 entries, 0 to 19152 Data columns (total 2 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 TOT_REG_CD 19153 non-null object 1 attract 19153 non-null float64 dtypes: float64(1), object(1) memory usage: 299.4+ KB
| TOT_REG_CD | attract | |
|---|---|---|
| 0 | 1101053010001 | 25032.015982 |
| 1 | 1101053010002 | 23781.495017 |
| 2 | 1101053010003 | 20374.563266 |
| 3 | 1101053010004 | 29197.524019 |
| 4 | 1101053010005 | 25115.623315 |
# 매력도를 표준화
from sklearn.preprocessing import StandardScaler
merged2['attract_sc1'] = StandardScaler().fit_transform(merged2[['attract']])
merged2.head()
| TOT_REG_CD | attract | attract_sc1 | |
|---|---|---|---|
| 0 | 1101053010001 | 25032.015982 | -0.062702 |
| 1 | 1101053010002 | 23781.495017 | -0.063221 |
| 2 | 1101053010003 | 20374.563266 | -0.064637 |
| 3 | 1101053010004 | 29197.524019 | -0.060970 |
| 4 | 1101053010005 | 25115.623315 | -0.062667 |
merged2['attract_sc1'].describe()
count 1.915300e+04 mean 3.372028e-17 std 1.000026e+00 min -7.256842e-02 25% -6.841745e-02 50% -6.332803e-02 75% -4.839055e-02 max 5.894172e+01 Name: attract_sc1, dtype: float64
# 표준화 매력도를 정규화
from sklearn.preprocessing import MinMaxScaler #0~1로 정규화
merged2['attract_sc2'] = MinMaxScaler().fit_transform(merged2[['attract_sc1']])
merged2.head()
| TOT_REG_CD | attract | attract_sc1 | attract_sc2 | |
|---|---|---|---|---|
| 0 | 1101053010001 | 25032.015982 | -0.062702 | 0.000167 |
| 1 | 1101053010002 | 23781.495017 | -0.063221 | 0.000158 |
| 2 | 1101053010003 | 20374.563266 | -0.064637 | 0.000134 |
| 3 | 1101053010004 | 29197.524019 | -0.060970 | 0.000197 |
| 4 | 1101053010005 | 25115.623315 | -0.062667 | 0.000168 |
merged2.attract_sc2.describe()
count 19153.000000 mean 0.001230 std 0.016945 min 0.000000 25% 0.000070 50% 0.000157 75% 0.000410 max 1.000000 Name: attract_sc2, dtype: float64
plt.boxplot(merged2.attract_sc2)
plt.show()
# 집계구 폴리곤에 중력모델 결과를 속성조인
gdf_jip3857m2 = pd.merge(gdf_jip3857m, merged2, on='TOT_REG_CD')
gdf_jip3857m2.info()
<class 'geopandas.geodataframe.GeoDataFrame'> Int64Index: 19153 entries, 0 to 19152 Data columns (total 9 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 TOT_REG_CD 19153 non-null object 1 ADM_NM 19153 non-null object 2 ADM_CD 19153 non-null object 3 geometry 19153 non-null geometry 4 pop_all 18953 non-null float64 5 dist 150 non-null float64 6 attract 19153 non-null float64 7 attract_sc1 19153 non-null float64 8 attract_sc2 19153 non-null float64 dtypes: float64(5), geometry(1), object(3) memory usage: 1.5+ MB
gdf_jip3857m2.head()
| TOT_REG_CD | ADM_NM | ADM_CD | geometry | pop_all | dist | attract | attract_sc1 | attract_sc2 | |
|---|---|---|---|---|---|---|---|---|---|
| 0 | 1101053010006 | 사직동 | 1101053 | POLYGON ((14134272.820 4520138.849, 14134279.4... | 4423.9508 | NaN | 0.000145 | -0.063991 | 0.000145 |
| 1 | 1101053010001 | 사직동 | 1101053 | POLYGON ((14133805.368 4519554.335, 14133826.3... | 3828.8897 | NaN | 0.000167 | -0.062702 | 0.000167 |
| 2 | 1101053010003 | 사직동 | 1101053 | POLYGON ((14133840.887 4520097.627, 14133843.1... | 22248.8882 | NaN | 0.000134 | -0.064637 | 0.000134 |
| 3 | 1101053010002 | 사직동 | 1101053 | POLYGON ((14134099.332 4520013.914, 14134099.9... | 20118.0878 | NaN | 0.000158 | -0.063221 | 0.000158 |
| 4 | 1101053010005 | 사직동 | 1101053 | POLYGON ((14134679.424 4520014.380, 14134681.1... | 16343.3748 | NaN | 0.000168 | -0.062667 | 0.000168 |
gdf_jip3857m3 = gdf_jip3857m2[['TOT_REG_CD', 'ADM_NM', 'ADM_CD', 'geometry', 'pop_all', 'attract', 'attract_sc1', 'attract_sc2']]
gdf_jip3857m3.head()
| TOT_REG_CD | ADM_NM | ADM_CD | geometry | pop_all | attract | attract_sc1 | attract_sc2 | |
|---|---|---|---|---|---|---|---|---|
| 0 | 1101053010006 | 사직동 | 1101053 | POLYGON ((14134272.820 4520138.849, 14134279.4... | 4423.9508 | 0.000145 | -0.063991 | 0.000145 |
| 1 | 1101053010001 | 사직동 | 1101053 | POLYGON ((14133805.368 4519554.335, 14133826.3... | 3828.8897 | 0.000167 | -0.062702 | 0.000167 |
| 2 | 1101053010003 | 사직동 | 1101053 | POLYGON ((14133840.887 4520097.627, 14133843.1... | 22248.8882 | 0.000134 | -0.064637 | 0.000134 |
| 3 | 1101053010002 | 사직동 | 1101053 | POLYGON ((14134099.332 4520013.914, 14134099.9... | 20118.0878 | 0.000158 | -0.063221 | 0.000158 |
| 4 | 1101053010005 | 사직동 | 1101053 | POLYGON ((14134679.424 4520014.380, 14134681.1... | 16343.3748 | 0.000168 | -0.062667 | 0.000168 |
# 집계구 폴리곤에 attract를 key로 한번 더 중력모델 결과를 속성조인
gdf_jip3857m4 = pd.merge(gdf_jip3857m3, merged, on='attract', how='left')
gdf_jip3857m4.info()
gdf_jip3857m4.head()
<class 'geopandas.geodataframe.GeoDataFrame'> Int64Index: 19212 entries, 0 to 19211 Data columns (total 26 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 TOT_REG_CD_x 19212 non-null object 1 ADM_NM_x 19212 non-null object 2 ADM_CD_x 19212 non-null object 3 geometry 19212 non-null geometry 4 pop_all_x 19012 non-null float64 5 attract 19212 non-null float64 6 attract_sc1 19212 non-null float64 7 attract_sc2 19212 non-null float64 8 TOT_REG_CD_y 19212 non-null object 9 ADM_NM_y 19212 non-null object 10 ADM_CD_y 19212 non-null object 11 geometry_x 19212 non-null geometry 12 pop_all_y 19012 non-null float64 13 jip_lon 19212 non-null float64 14 jip_lat 19212 non-null float64 15 dist 19212 non-null float64 16 key 19212 non-null int64 17 mart_id 19212 non-null int64 18 mart_area 19212 non-null float64 19 mart_nm 19212 non-null object 20 mart_ty 19212 non-null object 21 mart_x 19212 non-null float64 22 mart_y 19212 non-null float64 23 geometry_y 19212 non-null geometry 24 mart_lon 19212 non-null float64 25 mart_lat 19212 non-null float64 dtypes: float64(13), geometry(3), int64(2), object(8) memory usage: 4.0+ MB
| TOT_REG_CD_x | ADM_NM_x | ADM_CD_x | geometry | pop_all_x | attract | attract_sc1 | attract_sc2 | TOT_REG_CD_y | ADM_NM_y | ... | key | mart_id | mart_area | mart_nm | mart_ty | mart_x | mart_y | geometry_y | mart_lon | mart_lat | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1101053010006 | 사직동 | 1101053 | POLYGON ((14134272.820 4520138.849, 14134279.4... | 4423.9508 | 21930.676029 | -0.063991 | 0.000145 | 1101053010006 | 사직동 | ... | 1 | 2012301010007500002 | 59979.25 | 롯데백화점 본점 | 백화점 | 198259.653577 | 451392.198219 | POINT (126.97820 37.56481) | 126.9782 | 37.56481 |
| 1 | 1101053010001 | 사직동 | 1101053 | POLYGON ((14133805.368 4519554.335, 14133826.3... | 3828.8897 | 25032.015982 | -0.062702 | 0.000167 | 1101053010001 | 사직동 | ... | 1 | 2012301010007500002 | 59979.25 | 롯데백화점 본점 | 백화점 | 198259.653577 | 451392.198219 | POINT (126.97820 37.56481) | 126.9782 | 37.56481 |
| 2 | 1101053010003 | 사직동 | 1101053 | POLYGON ((14133840.887 4520097.627, 14133843.1... | 22248.8882 | 20374.563266 | -0.064637 | 0.000134 | 1101053010003 | 사직동 | ... | 1 | 2012301010007500002 | 59979.25 | 롯데백화점 본점 | 백화점 | 198259.653577 | 451392.198219 | POINT (126.97820 37.56481) | 126.9782 | 37.56481 |
| 3 | 1101053010002 | 사직동 | 1101053 | POLYGON ((14134099.332 4520013.914, 14134099.9... | 20118.0878 | 23781.495017 | -0.063221 | 0.000158 | 1101053010002 | 사직동 | ... | 1 | 2012301010007500002 | 59979.25 | 롯데백화점 본점 | 백화점 | 198259.653577 | 451392.198219 | POINT (126.97820 37.56481) | 126.9782 | 37.56481 |
| 4 | 1101053010005 | 사직동 | 1101053 | POLYGON ((14134679.424 4520014.380, 14134681.1... | 16343.3748 | 25115.623315 | -0.062667 | 0.000168 | 1101053010005 | 사직동 | ... | 1 | 2012301010007500002 | 59979.25 | 롯데백화점 본점 | 백화점 | 198259.653577 | 451392.198219 | POINT (126.97820 37.56481) | 126.9782 | 37.56481 |
5 rows × 26 columns
gdf_jip3857m5 = gdf_jip3857m4[['TOT_REG_CD_x', 'ADM_NM_x', 'ADM_CD_x', 'geometry', 'pop_all_x', 'attract', 'attract_sc1', 'attract_sc2', 'dist', 'mart_id', 'mart_area', 'mart_nm']]
gdf_jip3857m5.columns = ['tot_reg_cd', 'adm_nm', 'adm_cd', 'geometry', 'pop_all', 'attract', 'attract_sc1', 'attract_sc2', 'dist', 'mart_id', 'mart_area', 'mart_nm']
gdf_jip3857m5.head()
| tot_reg_cd | adm_nm | adm_cd | geometry | pop_all | attract | attract_sc1 | attract_sc2 | dist | mart_id | mart_area | mart_nm | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1101053010006 | 사직동 | 1101053 | POLYGON ((14134272.820 4520138.849, 14134279.4... | 4423.9508 | 21930.676029 | -0.063991 | 0.000145 | 1.653768 | 2012301010007500002 | 59979.25 | 롯데백화점 본점 |
| 1 | 1101053010001 | 사직동 | 1101053 | POLYGON ((14133805.368 4519554.335, 14133826.3... | 3828.8897 | 25032.015982 | -0.062702 | 0.000167 | 1.547935 | 2012301010007500002 | 59979.25 | 롯데백화점 본점 |
| 2 | 1101053010003 | 사직동 | 1101053 | POLYGON ((14133840.887 4520097.627, 14133843.1... | 22248.8882 | 20374.563266 | -0.064637 | 0.000134 | 1.715759 | 2012301010007500002 | 59979.25 | 롯데백화점 본점 |
| 3 | 1101053010002 | 사직동 | 1101053 | POLYGON ((14134099.332 4520013.914, 14134099.9... | 20118.0878 | 23781.495017 | -0.063221 | 0.000158 | 1.588111 | 2012301010007500002 | 59979.25 | 롯데백화점 본점 |
| 4 | 1101053010005 | 사직동 | 1101053 | POLYGON ((14134679.424 4520014.380, 14134681.1... | 16343.3748 | 25115.623315 | -0.062667 | 0.000168 | 1.545356 | 2012301010007500002 | 59979.25 | 롯데백화점 본점 |
gdf_jip3857m5.tail()
| tot_reg_cd | adm_nm | adm_cd | geometry | pop_all | attract | attract_sc1 | attract_sc2 | dist | mart_id | mart_area | mart_nm | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 19207 | 1102073010501 | 동화동 | 1102073 | POLYGON ((14139538.793 4517804.641, 14139537.7... | 1221.9702 | 63946.892311 | -0.046528 | 0.000441 | 0.473384 | 2002301007107500008 | 14330.00 | 서울중앙시장 |
| 19208 | 1102055030001 | 명동 | 1102055 | POLYGON ((14136342.720 4518591.610, 14136344.7... | 121202.7230 | 101597.255067 | -0.030880 | 0.000706 | 0.768351 | 2012301010007500002 | 59979.25 | 롯데백화점 본점 |
| 19209 | 1102058040004 | 장충동 | 1102058 | POLYGON ((14137952.117 4517316.775, 14137958.7... | 48856.1133 | 16165.854358 | -0.066387 | 0.000105 | 1.578934 | 2008301010007500005 | 40302.01 | 굿모닝시티 쇼핑몰 |
| 19210 | 1121052010002 | 보라매동 | 1121052 | POLYGON ((14129470.918 4508229.718, 14129459.5... | 23755.7097 | 128762.123348 | -0.019590 | 0.000898 | 0.410626 | 2012320019107500002 | 21711.05 | 롯데쇼핑(주) 관악점 |
| 19211 | 1121052010001 | 보라매동 | 1121052 | POLYGON ((14129327.270 4507830.057, 14129333.2... | 29766.9029 | 281075.067367 | 0.043714 | 0.001970 | 0.277926 | 2012320019107500002 | 21711.05 | 롯데쇼핑(주) 관악점 |
# 결과물 시각화
ax = gdf_jip3857m5.plot(column='attract', scheme='quantiles', k=5, cmap='Oranges', figsize=(15, 15), alpha=0.7)
ctx.add_basemap(ax, source=ctx.sources.ST_TONER_LITE) #ctx.sources.ST_TONER_LITE
ax.set_axis_off()
plt.show()
<ipython-input-196-7ba5bc74e375>:3: FutureWarning: The "contextily.tile_providers" module is deprecated and will be removed in contextily v1.1. Please use "contextily.providers" instead. ctx.add_basemap(ax, source=ctx.sources.ST_TONER_LITE) #ctx.sources.ST_TONER_LITE
# SHP 저장
gdf_jip3857m5.to_file('data/gdf_jip3857m5.shp', encoding='euc-kr')
<ipython-input-197-572163a92dbe>:2: UserWarning: Column names longer than 10 characters will be truncated when saved to ESRI Shapefile.
gdf_jip3857m5.to_file('data/gdf_jip3857m5.shp', encoding='euc-kr')
# 마트 명칭으로 디졸브
gdf_jip3857_mart = gdf_jip3857m5.dissolve(by='mart_nm')
gdf_jip3857_mart.to_file('data/gdf_jip3857_mart.shp', encoding='euc-kr')
<ipython-input-198-044dfc34b7bb>:3: UserWarning: Column names longer than 10 characters will be truncated when saved to ESRI Shapefile.
gdf_jip3857_mart.to_file('data/gdf_jip3857_mart.shp', encoding='euc-kr')
gdf_mart4326.to_file('data/gdf_mart4326.shp', encoding='euc-kr')
gdf_mart3857.to_file('data/gdf_mart3857.shp', encoding='euc-kr')